{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Langchain"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Code from https://python.langchain.com/docs/use_cases/question_answering/local_retrieval_qa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install langchain\n",
    "!pip install gpt4all\n",
    "!pip install chromadb\n",
    "!pip install llama-cpp-python\n",
    "!pip install langchainhub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import WebBaseLoader\n",
    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
    "from langchain_community.embeddings import GPT4AllEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "from langchain_community.llms import LlamaCpp\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
    "from langchain.docstore.document import Document\n",
    "from langchain import hub\n",
    "from langchain_core.runnables import RunnablePassthrough, RunnablePick\n",
    "import pandas as pd\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load custom dataset\n",
    "with open(\"all_mail_contents.txt\", \"r\", encoding=\"utf-8\") as f:\n",
    "\tall_mail_contents = f.read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert to langchain document format\n",
    "doc =  Document(page_content=all_mail_contents, metadata={\"source\": \"local\"})\n",
    "#split up\n",
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)\n",
    "all_splits = text_splitter.split_documents([doc])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
    "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
    "\n",
    "# Make sure the model path is correct for your system!\n",
    "llm = LlamaCpp(\n",
    "    model_path=r\"E:\\AllProgramming\\Llama2\\llama.cpp\\llama-2-7b-chat\\ggml-model-f16_q4_1.gguf\",\n",
    "    # model_path=r\"E:\\AllProgramming\\Llama2\\llama.cpp\\llama-2-7b-chat\\ggml-model-f16_q4_1.gguf\",\n",
    "    # model_path=r\"E:\\AllProgramming\\Llama2\\llama.cpp\\llama-2-13b-chat\\ggml-model-f16.gguf\",\n",
    "    n_gpu_layers=n_gpu_layers,\n",
    "    n_batch=n_batch,\n",
    "    # n_ctx=2048,\n",
    "    n_ctx=1024,\n",
    "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
    "    verbose=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"What can you do on Medium?\"\n",
    "llm.invoke(question) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# #this code can be used to see if the correct documents are retrieved. The documents retrieved should be regarding your questions, and is the data the LLM uses to answer the questions\n",
    "# question = \"Medium\"\n",
    "# docs = vectorstore.similarity_search(question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def format_docs(docs):\n",
    "    return \"\\n\\n\".join(doc.page_content for doc in docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# retrieve relevant docs\n",
    "\n",
    "rag_prompt = hub.pull(\"rlm/rag-prompt\")\n",
    "rag_prompt.messages\n",
    "\n",
    "retriever = vectorstore.as_retriever()\n",
    "qa_chain = (\n",
    "    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
    "    | rag_prompt\n",
    "    | llm\n",
    "    | StrOutputParser()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"What is the topic of my last Microsoft email I have gotten? Answer in detail\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "qa_chain.invoke(question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#try llama file #TODO "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.llms import GPT4All\n",
    "\n",
    "llm = GPT4All(\n",
    "    model=r\"C:\\Users\\eivin\\Documents\\Programming\\RAG_testing\\tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf\",\n",
    "    max_tokens=2048,    \n",
    ")\n",
    "llm.invoke(\"Can you answer questions?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.llms import GPT4All\n",
    "\n",
    "llm = GPT4All(\n",
    "    model=r\"all-MiniLM-L6-v2-f16.gguf\",\n",
    "    max_tokens=2048,\n",
    ")\n",
    "# llm.invoke(\"Can i ask question?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.llms import GPT4All\n",
    "\n",
    "llm = GPT4All(\n",
    "    model=\"mistral-7b-instruct-v0.1.Q4_0.gguf\",\n",
    "    max_tokens=2048,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "local_path = \"mistral-7b-instruct-v0.1.Q4_0.gguf\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
    "from langchain.chains import LLMChain\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain_community.llms import GPT4All\n",
    "\n",
    "\n",
    "# Callbacks support token-wise streaming\n",
    "callbacks = [StreamingStdOutCallbackHandler()]\n",
    "\n",
    "# Verbose is required to pass to the callback manager\n",
    "llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
